library(tidyverse)
library(rio)
library(lfe)
library(stargazer)

# set replication folder as working directory
setwd("~replication")

load("data_genderedcost_long.rdata")

# only include completed answers - and answers given before deadline
# 2021-12-20 21:49:52 was the last response within the time frame
df_long <- df_long %>% 
  filter(SurveyStatus==2)

df_long <- df_long %>% 
  filter(SurveyEndTime<="2021-12-20 21:49:52")

df_long <- df_long %>% 
  dplyr::mutate(id = factor(id),
                task_number = as.numeric(task_number))

########## TEST FOR CARRY-OVER EFFEKTER ##########

# Can previous answer predict the next?
# check for correlation between choice in t-1 (x) and choice in t (y)

# create variable for next profile choice - note that there are two rows pr. round pr. individual in the dataset
# hence it might be wrong to just group by ID and arrange task number - maybe select on choice==1 - this by definition
# gives 1 row pr. subject pr. round and profile-choice still hold info about the choice
df_long_lead <- df_long %>% 
  filter(choice==1) %>% 
  group_by(id) %>% 
  arrange(task_number) %>% 
  mutate(next_profile_choice = lead(profile_choice,1))  %>% 
  # changing from 1/2 to 0/1 to make estimates more intuitive
  mutate(profile_choice = profile_choice-1,
         next_profile_choice = next_profile_choice-1) %>% 
  #make outcome variable "reselecting" which equals one if subjects choose the same choice as in last round
  mutate(reselecting = ifelse(profile_choice==next_profile_choice,1,0))


# regression w. control for task number and SEs clustered by candidate
carry_over <- felm(reselecting ~ profile_choice + task_number|0|0|id, data = df_long_lead)

stargazer(carry_over, 
          keep.stat = "n",
          label = "tab:carry_over",
          omit = c("task_number"),
          covariate.labels = c("Previous choice", "Constant"),
          add.lines = list(c("Control for task number", "Yes")),
          dep.var.labels = "Reselecting choice",
          out = "table_carry_over.tex")


######## TEST FOR A/B PREFERENCES ARE BALANCED ###########
### i.e. check if candidates tend to overselect the left or right hand profile.

df_long %>%
  group_by(profile) %>% 
  summarise(mean(choice))

# A is chosen 50.4 percent of the times and B is chosen 49.6 percent of the times

# Test if this is different by regressing choice by profile, with control for task order and SEs clustered by candidates:
ab_test <- felm(choice~profile + task_number |0 |0|id, data = df_long)

stargazer(ab_test,
          keep.stat = "n",
          label = "tab:ab_test",
          omit = c("task_number"),
          covariate.labels = c("Right Hand Profile", "Constant"),
          add.lines = list(c("Control for task number", "Yes")),
          dep.var.labels = "Choice",
          out = "table_ab_test.tex")
